library(socviz)
library(lubridate)
library(geofacet)
library(ggthemes)
library(ggrepel)
library(ggridges)
library(plyr)
library(skimr)
library(tidyverse)
library(gganimate)
library(plotly)
theme_set(theme_minimal())
# Read 2015 Data
h15 <- read_csv("Happiness_Data/2015.csv")
h15 <- h15 %>%
dplyr::mutate(Year = 2015) %>%
dplyr::rename(H_rank=`Happiness Rank`, # Modify variable names
H_score = `Happiness Score`,
GDP=`Economy (GDP per Capita)`,
Health=`Health (Life Expectancy)`,
Trust=`Trust (Government Corruption)`,
SE=`Standard Error`,
dystopia_res = `Dystopia Residual`)
# Read 2016 Data
h16 <- read_csv("Happiness_Data/2016.csv")
h16 <- h16 %>%
dplyr::mutate(Year = 2016,
`Standard Error` = (`Upper Confidence Interval`-`Lower Confidence Interval`)/3.92) %>%
# SE = (upper limit – lower limit) / 3.92.
# This is for 95% CI
dplyr::select(-c(`Upper Confidence Interval`,`Lower Confidence Interval`)) %>%
dplyr::rename(H_rank=`Happiness Rank`, # Modify variable names
H_score = `Happiness Score`,
GDP=`Economy (GDP per Capita)`,
Health=`Health (Life Expectancy)`,
Trust=`Trust (Government Corruption)`,
SE=`Standard Error`,
dystopia_res = `Dystopia Residual`)
# Since we don't have a variable 'Region' starting from 2017, we will create it for
# each year
h_regions <- dplyr::select(h16, Country, Region)
# Read 2017 Data
h17 <- read_csv("Happiness_Data/2017.csv")
h17 <- h17 %>%
dplyr::mutate(Year = 2017,
`Standard Error` = (`Whisker.high`-`Whisker.low`)/3.92,) %>%
merge(h_regions,by="Country", all.x=T) %>%
dplyr::select(-c(`Whisker.high`,`Whisker.low`)) %>%
dplyr::rename(H_rank=`Happiness.Rank`, # Modify variable names
H_score = Happiness.Score,
GDP=Economy..GDP.per.Capita.,
Health=Health..Life.Expectancy.,
Trust=Trust..Government.Corruption.,
SE=`Standard Error`,
dystopia_res = Dystopia.Residual)
# Read 2018 Data
h18 <- read_csv("Happiness_Data/2018.csv")
h18 <- h18 %>%
dplyr::mutate(Year = 2018) %>%
dplyr::rename(H_rank=`Overall rank`, # Modify variable names
H_score = `Score`,
GDP=`GDP per capita`,
Country = `Country or region`,
Health=`Healthy life expectancy`,
Trust=`Perceptions of corruption`,
Freedom = `Freedom to make life choices`,
Family = `Social support`) %>%
merge(h_regions,by="Country", all.x=T) %>%
dplyr::mutate(dystopia_res = H_score - (GDP + Family + Health + Freedom + Generosity + as.numeric(Trust)))
# Read 2019 Data
h19 <- read_csv("Happiness_Data/2019.csv")
h19 <- h19 %>%
dplyr::mutate(Year = 2019) %>%
dplyr::rename(H_rank=`Overall rank`, # Modify variable names
H_score = `Score`,
GDP=`GDP per capita`,
Country = `Country or region`,
Health=`Healthy life expectancy`,
Trust=`Perceptions of corruption`,
Freedom = `Freedom to make life choices`,
Family = `Social support`) %>%
merge(h_regions,by="Country", all.x=T) %>%
dplyr::mutate(dystopia_res = H_score -
(GDP + Family + Health + Freedom + Generosity + as.numeric(Trust)))
# Combine all data into all_dat
h_alldat <- tibble(rbind.fill(h15,h16,h17,h18,h19))
h_alldat <- h_alldat %>%
dplyr::mutate(Country = as.factor(tolower(Country)),
Region = as.factor(Region))
#rmarkdown::paged_table(h_alldat)
knitr::kable(papeR::summarize_numeric(h_alldat, type = "numeric", group = "Region",variables = c("H_rank", "H_score"), test = FALSE))
| Region | N | Mean | SD | Min | Q1 | Median | Q3 | Max | |||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | H_rank | Australia and New Zealand | 10 | 9.10 | 1.10 | 8.00 | 8.00 | 9.00 | 10.00 | 11.00 | |||
| 1.1 | Central and Eastern Europe | 144 | 75.63 | 26.80 | 20.00 | 55.50 | 73.00 | 91.50 | 138.00 | ||||
| 1.2 | Eastern Asia | 28 | 66.32 | 22.54 | 25.00 | 52.00 | 65.00 | 83.50 | 101.00 | ||||
| 1.3 | Latin America and Caribbean | 109 | 49.75 | 29.97 | 12.00 | 28.00 | 43.00 | 63.00 | 148.00 | ||||
| 1.4 | Middle East and Northern Africa | 96 | 79.56 | 41.40 | 11.00 | 39.00 | 83.00 | 109.00 | 156.00 | ||||
| 1.5 | North America | 10 | 11.30 | 5.14 | 5.00 | 7.00 | 11.00 | 15.00 | 19.00 | ||||
| 1.6 | Southeastern Asia | 44 | 80.55 | 35.45 | 22.00 | 46.50 | 81.50 | 107.00 | 145.00 | ||||
| 1.7 | Southern Asia | 35 | 112.46 | 23.31 | 67.00 | 97.00 | 115.00 | 127.50 | 154.00 | ||||
| 1.8 | Sub-Saharan Africa | 185 | 126.86 | 21.41 | 55.00 | 114.00 | 131.00 | 143.00 | 158.00 | ||||
| 1.9 | Western Europe | 103 | 26.12 | 26.36 | 1.00 | 6.00 | 17.00 | 36.00 | 102.00 | ||||
| 2 | H_score | Australia and New Zealand | 10 | 7.29 | 0.03 | 7.23 | 7.28 | 7.30 | 7.31 | 7.33 | |||
| 2.1 | Central and Eastern Europe | 144 | 5.43 | 0.59 | 4.10 | 5.13 | 5.50 | 5.85 | 6.85 | ||||
| 2.2 | Eastern Asia | 28 | 5.63 | 0.47 | 4.87 | 5.25 | 5.65 | 5.92 | 6.45 | ||||
| 2.3 | Latin America and Caribbean | 109 | 6.02 | 0.73 | 3.58 | 5.74 | 6.12 | 6.48 | 7.23 | ||||
| 2.4 | Middle East and Northern Africa | 96 | 5.34 | 1.02 | 3.01 | 4.69 | 5.27 | 6.23 | 7.28 | ||||
| 2.5 | North America | 10 | 7.17 | 0.20 | 6.89 | 6.99 | 7.20 | 7.33 | 7.43 | ||||
| 2.6 | Southeastern Asia | 44 | 5.34 | 0.80 | 3.82 | 4.75 | 5.27 | 6.04 | 6.80 | ||||
| 2.7 | Southern Asia | 35 | 4.58 | 0.59 | 3.20 | 4.34 | 4.57 | 5.05 | 5.65 | ||||
| 2.8 | Sub-Saharan Africa | 185 | 4.21 | 0.59 | 2.84 | 3.78 | 4.22 | 4.57 | 5.89 | ||||
| 2.9 | Western Europe | 103 | 6.76 | 0.75 | 4.86 | 6.34 | 6.94 | 7.41 | 7.77 |
# Read data in
death_dat <- read_csv('/Volumes/Programming/Spring 2022/DANL 310/my_website/aLin-96.github.io/Happiness_Data/number-of-deaths-by-risk-factor.csv')
death_dat <- death_dat %>%
filter(Year > 2015) %>%
arrange(Year)
rmarkdown::paged_table(death_dat)
country_profile <- read_csv('/Volumes/Programming/Spring 2022/DANL 310/my_website/aLin-96.github.io/Happiness_Data/kiva_country_profile_variables.csv')
country_profile <- country_profile %>%
mutate(country = tolower(country)) %>%
dplyr::rename(Country = country) %>%
dplyr::select(-c(Region))
h_p_alldat <- merge(h_alldat, country_profile, by = "Country")
rmarkdown::paged_table(country_profile)
# Get Top 10 mean of happiness rank from 2015 ~ 2019
top_10 <- h_alldat %>%
group_by(Country) %>%
dplyr::summarise(mean_rank = mean(H_rank)) %>%
arrange(desc(mean_rank)) %>%
filter(mean_rank <= 10)
rmarkdown::paged_table(top_10)
ggplot(dplyr::filter(h_alldat, Region != "NA")) +
geom_boxplot(aes(x = H_score, y=reorder(Region, H_score), color = Region))+
theme_classic() +
theme(legend.position = "None") +
labs(x = "Happiness Scores", y = "Regions")

ggplot(dplyr::filter(h_alldat, Region != "NA"), aes(x = GDP, y=H_score, color = Region)) +
geom_point() +
theme_classic()
base <- h_alldat %>%
plot_ly(x = ~GDP, y = ~H_score,
text = ~Country, hoverinfo = "text",
width = 800, height = 500)
base %>%
add_markers(color = ~Region, frame = ~Year, ids = ~Country) %>%
animation_opts(1000, easing = "elastic", redraw = FALSE) %>%
animation_button(
x = 1, xanchor = "right", y = 0, yanchor = "bottom"
) %>%
animation_slider(
currentvalue = list(prefix = "YEAR ", font = list(color="red"))
)
world_map <- map_data("world")
world <- world_map %>%
dplyr::rename(Country = region) %>%
dplyr::mutate(Country = str_to_lower(Country),
Country = ifelse(
Country == "usa",
"united states", Country),
Country = ifelse(
Country == "democratic republic of the congo",
"congo (kinshasa)", Country),
Country = ifelse(
Country == "republic of congo",
"congo (brazzaville)", Country),
Country = as.factor(Country))
h_alldat_world <- left_join(h_alldat, world, by = "Country",all.x=TRUE)
p <- ggplot(h_alldat_world, aes(long, lat, group = group,
fill = H_score,
frame = Year))+
geom_polygon(na.rm = TRUE)+
scale_fill_gradient(low = "white", high = "#FD8104", na.value = NA) +
theme_map()
p %>%
plotly::ggplotly() %>%
animation_opts(1000, easing = "elastic",transition = 0, redraw = FALSE)